- departure delay by city
# read in dataset
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
packageVersion('plotly')
## [1] '4.8.0'
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df <- read.csv("C:/Users/ziwan/Desktop/2018 Fall Courses/BST 260/Project/201701.csv")
# calculate mean departure delay minutes by city
city_delay <- df %>%
group_by(ORIGIN_CITY_NAME) %>%
summarize(mean_delay = mean(DEP_DELAY_NEW, na.rm = TRUE))
library(splitstackshape)
city_delay <- cSplit(city_delay, "ORIGIN_CITY_NAME", sep=",")
city_delay <- city_delay %>% mutate(name = ORIGIN_CITY_NAME_1)
# add the coordination of cities
coordinate <- read.csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_us_cities.csv')
city_delay <- city_delay %>% mutate(name = trimws(as.character(name)))
coordinate <- coordinate %>% mutate(name = trimws(as.character(name)))
merged_city_delay <- left_join(city_delay,coordinate, by='name')
merged_city_delay <- merged_city_delay %>%
group_by(name) %>%
summarize(mean_delay = mean(mean_delay, na.rm = TRUE), lat = mean(lat), lon = mean(lon))
# draw the plot by cities
merged_city_delay$q <- with(merged_city_delay, cut(mean_delay, quantile(mean_delay)))
levels(merged_city_delay$q) <- paste(c("1st", "2nd", "3rd", "4th", "5th"), "Quantile")
merged_city_delay$q <- as.ordered((merged_city_delay$q))
g <- list(
scope = 'usa',
projection = list(type = 'albers usa'),
showland = TRUE,
landcolor = toRGB("gray85"),
subunitwidth = 1,
countrywidth = 1,
subunitcolor = toRGB("white"),
countrycolor = toRGB("white")
)
p <- plot_geo(merged_city_delay, locationmode = 'USA-states', sizes = c(1, 250)) %>%
add_markers(
x = ~lon, y = ~lat, size = ~mean_delay, color = ~q, hoverinfo = "text",
text = ~paste(merged_city_delay$name, "<br />", merged_city_delay$mean_delay, "minutes")
) %>%
layout(title = '2017 January average departure delay (minutes) by city', geo = g)
p
## Warning: Ignoring 92 observations
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
- arrival delay by city
# calculate mean departure delay minutes by city
city_delay <- df %>%
group_by(DEST_CITY_NAME) %>%
summarize(mean_delay = mean(ARR_DELAY_NEW, na.rm = TRUE))
library(splitstackshape)
city_delay <- cSplit(city_delay, "DEST_CITY_NAME", sep=",")
city_delay <- city_delay %>% mutate(name = DEST_CITY_NAME_1)
# add the coordination of cities
coordinate <- read.csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_us_cities.csv')
city_delay <- city_delay %>% mutate(name = trimws(as.character(name)))
coordinate <- coordinate %>% mutate(name = trimws(as.character(name)))
merged_city_delay <- left_join(city_delay,coordinate, by='name')
merged_city_delay <- merged_city_delay %>%
group_by(name) %>%
summarize(mean_delay = mean(mean_delay, na.rm = TRUE), lat = mean(lat), lon = mean(lon))
# draw the plot by cities
merged_city_delay$q <- with(merged_city_delay, cut(mean_delay, quantile(mean_delay)))
levels(merged_city_delay$q) <- paste(c("1st", "2nd", "3rd", "4th", "5th"), "Quantile")
merged_city_delay$q <- as.ordered((merged_city_delay$q))
g <- list(
scope = 'usa',
projection = list(type = 'albers usa'),
showland = TRUE,
landcolor = toRGB("gray85"),
subunitwidth = 1,
countrywidth = 1,
subunitcolor = toRGB("white"),
countrycolor = toRGB("white")
)
p <- plot_geo(merged_city_delay, locationmode = 'USA-states', sizes = c(1, 250)) %>%
add_markers(
x = ~lon, y = ~lat, size = ~mean_delay, color = ~q, hoverinfo = "text",
text = ~paste(merged_city_delay$name, "<br />", merged_city_delay$mean_delay, "minutes")
) %>%
layout(title = '2017 January average arrival delay (minutes) by city', geo = g)
p
## Warning: Ignoring 91 observations
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.